/*NOTE: This is model code to illustrate the logic used to generate the datasets for analysis in the SCARP pilot.
This code is not intended to run as is - due to Epic restrictions on the sharing of intellectual property all 
of the table and attribute names have been "masked" with names enclosed in square brackets if they were pulled 
from Clarity (EPIC).*/

/*****************************************************
PROJECT:  BRIDGE C2
PILOT: SCRAP
DATE: 8/xx/2022
9/6/2022 Correction made:
  from if screen_date<'29FEB2020'd then delete;
	to if screen_date>'29FEB2020'd then delete;

DESCRIPTION: Use ASCEND approach to compare CRC, CVC and 
BC cancer prevention services in patients with
	No SDH Screening
	Documented Need
	Documented No Need
Stratified by SDH need: Food, Housing, Transportation
STUDY PERIOD: 7/1/2016-2/29/2020		

DETAILS (this syntax): 
Create files of order dates,screen dates and analysis file 

Output file: 
scrap.CVC_order_dates_new
SCRAP.CVC_analysis_09122023

*****************************************************/;
options compress=yes reuse=yes;
libname RD odbc noprompt="dsn=research_dev;Trusted_Connection=yes" bulkload=yes;
libname SCRAP "e:\\sasroot\BRIDGE C2\SCRAP\";


proc sql;
create table rd.jpo_cvc_cohort as select* from scrap.scrap_CVC_patients;
 
proc sql; create table RD.jpo_CVC_screens_lu as select * from scrap.CVC_screens_LU; quit;


*screening orders and results from [ORDER PROCEDURE];
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table CVC_orders as 
select * from connection to mycon (

SELECT distinct
	den.raw_patid
    ,op.[ORDERING DATE]
	,op.[result time]
	,op.[procedure identifier]
	,op.[order status]
	,ppc.[screen]
	,op.[pending order yes/no]
	,op.[ORDER PROCEDURE IDENTIFIER]
	,op.[abnormal yes/no]
	,op.[display name]
	,op.[ORDERING DATE]
	,op.[result time] 	
FROM
    research_dev.dbo.jpo_CVC_cohort  AS den
		inner join Clarity.dbo.[ORDER PROCEDURE] as op
			on den.raw_patid = op.[patient identifier] 
        inner join research_dev.dbo.jpo_CVC_screens_lu AS ppc
      ON op.[procedure identifier] = ppc.proc_id);
quit;
*3=resulted 5=completed;
*2=sent ;
*4=canceled ;

data CVC_orders2 ;
set CVC_orders;
length source $20;
Source='Order proc';
format order_date date9.;
order_date=datepart([ORDERING DATE]);
where [order status]^=4;
if order_date<'01JUL2011'd then delete;
if order_date>'29FEB2020'd then delete;
run;

data CVC_screen_procs;
set CVC_orders;
format screen_date date9.;
screen_date=datepart([result time]);
where [result time]^=.;
if screen_date<'01JUL2011'd then delete;
if screen_date>'29FEB2020'd then delete;
run;


proc sql;
connect to odbc as mycon (required='dsn=research_dev;Trusted_Connection=yes');                                                             
create table clarity_LAB_components as select *
from connection to mycon (
select distinct pt.raw_patid,  
	r.[component identifier], 	
	zr.[result status], 
	zo.[order status],
	r.[ORDER PROCEDURE IDENTIFIER], 
	e.[procedure code], 
	e.[procedure name], 
	o.[ORDERING DATE], 
	r.[result time], 
	r.[order value], 
	r.[order number value], 
	r.[result flag], 
	zc.[result flag name],
	o.[patient encounter contact serial number identifier], 
	cc.[default lnc identifier],
	cc.[external name],
	ppc.screen
from CLARITY.dbo.[ORDER RESULTS] r
inner join CLARITY.dbo.[ORDER PROCEDURE] o on o.[ORDER PROCEDURE IDENTIFIER]=r.[ORDER PROCEDURE IDENTIFIER]
inner join research_dev.dbo.jpo_CVC_cohort  pt on pt.raw_patid=o.[patient identifier]
left join CLARITY.dbo.[LAB STATUS] zr on zr.[lab status]=r.[lab status]
left join CLARITY.dbo.[ORDER STATUS] zo on zo.[order status]=o.[order status] 
left join clarity.dbo.[result flag] zc on r.[result flag] = zc.[result flag]
left join CLARITY.dbo.[procedures records EAP] e on e.[procedure identifier]=o.[procedure identifier]
left join clarity.dbo.[component] cc on cc.[component identifier]=r.[component identifier]
inner join research_dev.dbo.jpo_CVC_screens_lu AS ppc
on ppc.[component identifier]=r.[component identifier]
where ppc.[component identifier] is not null);

data clarity_LAB_components;
set  clarity_LAB_components; 
length source $20;
format order_date screen_date date9.;
Source='Lab Component';
order_date=datepart([ORDERING DATE]); 
screen_date=datepart([result time]);
if screen_date<'01JUL2011'd then delete;
if screen_date>'29FEB2020'd then delete;
run;


proc sql;
connect to odbc as mycon (required='dsn=research_dev;Trusted_Connection=yes');                                                             
create table clarity_LAB_LNC_cc as select *
from connection to mycon (
select distinct pt.raw_patid,  
	r.[component identifier], 	
	zr.[result status], 
	zo.[order status],
	r.[ORDER PROCEDURE IDENTIFIER], 
	e.[procedure code], 
	e.[procedure name], 
	o.[ORDERING DATE], 
	r.[result time], 
	r.[order value], 
	r.[order number value], 
	r.[result flag], 
	zc.[result flag name],
	o.[patient encounter contact serial number identifier], 
	cc.[default lnc identifier],
	r.[component lnc identifier],
	ppc.screen
from CLARITY.dbo.[ORDER RESULTS] r
inner join CLARITY.dbo.[ORDER PROCEDURE] o on o.[ORDER PROCEDURE IDENTIFIER]=r.[ORDER PROCEDURE IDENTIFIER]
inner join research_dev.dbo.jpo_CVC_cohort  pt on pt.raw_patid=o.[patient identifier]
left join CLARITY.dbo.[LAB STATUS] zr on zr.[lab status]=r.[lab status]
left join CLARITY.dbo.[ORDER STATUS] zo on zo.[order status]=o.[order status] 
left join clarity.dbo.[result flag] zc on r.[result flag] = zc.[result flag]
left join CLARITY.dbo.[procedures records EAP] e on e.[procedure identifier]=o.[procedure identifier]
left join clarity.dbo.[component] cc on cc.[component identifier]=r.[component identifier]
inner join research_dev.dbo.jpo_CVC_screens_lu AS ppc
on ppc.LOinc_code=cc.[default lnc identifier] 
where ppc.loinc_code is not null);

data clarity_LAB_LNC_cc;
set  clarity_LAB_LNC_cc; 
length source $20;
format order_date screen_date date9.;
Source='Lab CC LNC';
order_date=datepart([ORDERING DATE]); 
screen_date=datepart([result time]);
if screen_date<'01JUL2011'd then delete;
if screen_date>'29FEB2020'd then delete;
run;


proc sql;
create table lab_LN_cc as
select * from clarity_LAB_LNC_cc
where [ORDER PROCEDURE IDENTIFIER] ^in(select distinct [ORDER PROCEDURE IDENTIFIER] from 
	clarity_LAB_components);
quit;


data lab_results;
set clarity_LAB_components lab_LN_cc;
run;


proc sql;
connect to odbc as mycon (required='dsn=research_dev;Trusted_Connection=yes');                                                             
create table clarity_LAB_LNC_or as select *
from connection to mycon (
select distinct pt.raw_patid,  
	r.[component identifier], 	
	zr.[result status], 
	zo.[order status],
	r.[ORDER PROCEDURE IDENTIFIER], 
	e.[procedure code], 
	e.[procedure name], 
	o.[ORDERING DATE], 
	r.[result time], 
	r.[order value], 
	r.[order number value], 
	r.[result flag], 
	zc.[result flag name],
	o.[patient encounter contact serial number identifier], 
	r.[component lnc identifier],
	ppc.screen
from CLARITY.dbo.[ORDER RESULTS] r
inner join CLARITY.dbo.[ORDER PROCEDURE] o on o.[ORDER PROCEDURE IDENTIFIER]=r.[ORDER PROCEDURE IDENTIFIER]
inner join research_dev.dbo.jpo_CVC_cohort  pt on pt.raw_patid=o.[patient identifier]
left join CLARITY.dbo.[LAB STATUS] zr on zr.[lab status]=r.[lab status]
left join CLARITY.dbo.[ORDER STATUS] zo on zo.[order status]=o.[order status] 
left join clarity.dbo.[result flag] zc on r.[result flag] = zc.[result flag]
left join CLARITY.dbo.[procedures records EAP] e on e.[procedure identifier]=o.[procedure identifier]
inner join research_dev.dbo.jpo_CVC_screens_lu AS ppc
on ppc.Loinc_code=r.[component lnc identifier] 
where ppc.loinc_code is not null);

data clarity_LAB_LNC_or;
set  clarity_LAB_LNC_or; 
length source $20;
format order_date screen_date date9.;
Source='Lab OR LNC';
order_date=datepart([ORDERING DATE]); 
screen_date=datepart([result time]);
if screen_date<'01JUL2011'd then delete;
if screen_date>'29FEB2020'd then delete;
run;


/*drop records already included in CVC_screen procs*/
proc sql;
create table new_results as
select * from lab_results
where [ORDER PROCEDURE IDENTIFIER] ^in
(select distinct [ORDER PROCEDURE IDENTIFIER] from CVC_screen_procs);


data ordered_screens;
set CVC_screen_procs new_results;
run;


data ordered_screens;
set ordered_screens;
if screen_date<order_date-60 then delete;
run;
/*combine orders*/
data orders;
set CVC_orders new_results;
run;

proc sql;
create table orders2 as
select o.*,
op.[ORDERING DATE] as ordering_date2
from orders o left join clarity.[ORDER PROCEDURE] op on
o.[ORDER PROCEDURE IDENTIFIER]=op.[ORDER PROCEDURE IDENTIFIER]
where (o.[ORDER PROCEDURE IDENTIFIER]^=. or o.order_date^=. );
quit;

data orders2;
set orders2;
if order_date=. then order_date=datepart(ordering_date2);
run;



/*limit to orders in study period*/
/*join to birthdate and delete HIV at <30 y*/
/*birthdates in ADV would miss changed patid*/
proc sql;
create table orders3 as
select o.*,
p.index_visit,
p.age_at_index_visit
from orders2 o left join scrap.cvc_patient_covariates p
on o.raw_patid=p.raw_patid;
quit;

data orders4;
set orders3;
age_at_order=age_at_index_visit+yrdif(index_visit,order_date,'age');
where order_date>='01JUL2016'd;
run;

data orders4;
set orders4;
if screen='HPV' then do;
	if age_at_order<29 then delete;
end;
if age_at_order>=65 then delete;
run;


/*pull from surgical history*/
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table CVC_shx as 
select * from connection to mycon (
     SELECT den.raw_patid	
  	,shx.[contact date] 
	,shx.[surgical history start date]
     ,shx.[SURGICAL_HISTORY DATE]
     ,cpc.screen
    FROM research_dev.dbo.jpo_CVC_cohort  AS den
     INNER JOIN clarity.dbo.SURGICAL_HX        AS shx
      ON den.raw_patID = shx.[patient identifier]
     INNER JOIN research_dev.dbo.jpo_CVC_screens_lu AS cpc
      ON shx.[procedure identifier] = cpc.[procedure identifier])
	;      
quit;
/*only cases where the exact date of the surgical procedure is known*/*/;

proc sql;
select count([ORDER PROCEDURE IDENTIFIER]), count(distinct [ORDER PROCEDURE IDENTIFIER]) from orders4;
*deduplicate;
proc sql;
create table orders5  as
select raw_patid,
screen,
order_date,
[ORDER PROCEDURE IDENTIFIER],
max(screen_date) as screen_date,
count([ORDER PROCEDURE IDENTIFIER]) as records,
count(distinct source) as sources
from orders4
group by raw_patid,
screen,
order_date,
[ORDER PROCEDURE IDENTIFIER];


data scrap.CVC_order_dates_new;
set orders5;
run;



/***********************************************************
merge to screen results
***********************************************************/

/*pull from surgical history*/
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table CVC_shx as 
select * from connection to mycon (
     SELECT den.raw_patid	
  	,shx.[contact date] 
	,shx.[surgical history start date]
     ,shx.[SURGICAL_HISTORY DATE]
     ,cpc.screen
    FROM research_dev.dbo.jpo_CVC_cohort  AS den
     INNER JOIN clarity.dbo.[SURGICAL HISTORY]        AS shx
      ON den.raw_patID = shx.[patient identifier]
     INNER JOIN research_dev.dbo.jpo_CVC_screens_lu AS cpc
      ON shx.[procedure identifier] = cpc.proc_id)
	;      
quit;
/*only cases where the exact date of the surgical procedure is known*/*/;

data CVC_SHX2 ;
	set CVC_SHX;
	length source $20;
	Source='Surgical Hx';
	month=0;
	day=0;
	format screen_date date9.;
	if [surgical history start date]^=. then
		do;
			screen_date=datepart([surgical history start date]);
			if screen_date>='01JUL2011'd and screen_date<='29FEB2020'd then 
				output CVC_SHX2;
		end;
	if [surgical history start date]=. then
		do;
			if surgical_hx_date^=' ' and surgical_hx_date^='NO REPORT' then
				do;
					if index(surgical_hx_date,'/')^=0 then
						do;
							month=input (scan(surgical_hx_date,1,'/'),best4.);
							day=input (scan(surgical_hx_date,2,'/'),best4.);
							year=input (scan(surgical_hx_date,3,'/'),best4.);
						end;
					if index(surgical_hx_date,'-')^=0 then
						do;
							month=input (scan(surgical_hx_date,1,'-'),best4.);
							day=input (scan(surgical_hx_date,2,'-'),best4.);
							year=input (scan(surgical_hx_date,3,'-'),best4.);
						end;
					if month>0 and month<13 and
						day>0 and day<32 and
						year>2005 and year<2022 then
							do;
								screen_date=mdy(month, day, year);
							end;
			if screen_date>='01JUL2011'd and screen_date<='29FEB2020'd then 
				output CVC_SHX2;
				end;
		end;
run;


/*Screens from HM_history*/
proc sql;
connect to odbc as mycon
(required = "dsn=OCHIN_HCN_FENWAY;Trusted_Connection=yes");
create table HMT_hx as select *
from connection to mycon (
SELECT 
	p.raw_patid
    ,hmt.[HEALTH MAINTENANCE TOPIC name]
	,hmo.[HEALTH MAINTENANCE TOPIC IDENTIFIER]
    ,hmo.[HEALTH MAINTENANCE HISTORY Date]
FROM 
	Clarity.dbo.HM_HISTORY hmo
		INNER JOIN research_dev.dbo.jpo_CVC_cohort p
			ON hmo.[patient identifier] = p.raw_PATID
		LEFT JOIN Clarity.dbo.CLARITY_HM_TOPIC hmt
			ON hmt.[HEALTH MAINTENANCE TOPIC IDENTIFIER] = hmo.[HEALTH MAINTENANCE TOPIC IDENTIFIER]
WHERE 
	
		(hmo.[HEALTH MAINTENANCE TOPIC IDENTIFIER] between 1 and 5 or hmo.[HEALTH MAINTENANCE TOPIC IDENTIFIER] IN (18,34,57)) 
	and hmo.HM_TYPE_C = 1 );


data HMT_HX2;
set HMT_HX;
length source $20 screen $5;
format screen_date date9.;
screen_date=datepart([HEALTH MAINTENANCE HISTORY Date]);
Source='HMT HX';
if screen_date<'01JUL2016'd then delete;
if screen_date>'29FEB2020'd then delete;
if [HEALTH MAINTENANCE TOPIC IDENTIFIER] in(1,2,3,4,5,18) then screen='PAP';
/*create separate records for PAP and HPV*/
if [HEALTH MAINTENANCE TOPIC IDENTIFIER] in(34,57) then do;
	screen='PAP'; output;
	screen='HPV';output;
end;
run;


/*combine screens with no [ORDER PROCEDURE IDENTIFIER];*/
data screens;
set CVC_shx2 HMT_HX2;
run;



/*deduplicate*/
proc sql;
create table CVC_screen_dates as
select raw_patid,
screen,
screen_date,
sum(source='HMT HX') as HMT_HX_records,
sum(source='Surgical Hx') as Surg_hx_records
from screens
group by raw_patid,
screen,
screen_date;

/*combine with prior pulled screen to ensure none are missing,
limit to PAP as that's actually what we are summarizing for the table*/
data PAP_screen_dates;
set cvc_screen_dates scrap.pap_screen_dates;
where screen in ('PAP','COTEST');
run;

/*add age at screen_date*/
/*limit to orders in study period*/
/*join to birthdate and delete HIV at <30 y*/
/*birthdates in ADV would miss changed patid*/
proc sql;
create table screens2 as
select o.*,
p.index_visit,
p.age_at_index_visit
from PAP_screen_dates o left join scrap.cvc_patient_covariates p
on o.raw_patid=p.raw_patid;
quit;

data screens3;
set screens2;
age_at_screen=age_at_index_visit+yrdif(index_visit,screen_date,'age');
where screen_date>='01JUL2016'd and screen_date<='29FEB2020'd;
run;

data screens4;
set screens3;
if age_at_screen<21 then delete;
if age_at_screen>=65 then delete;
run;



/******************************************************
Flag completed orders
******************************************************/
/*link orders to observation end*/
proc sql;
create table PAPorders2 as
select a.raw_patid,
a.first_due_date,
a.observation_end,
a.one_year_due,
o.[ORDER PROCEDURE IDENTIFIER], 
o.screen,
o.order_date
from scrap.cvc_analysis a inner join 
scrap.CVC_order_dates_new o
on a.raw_patid=o.raw_patid and 
o.order_date between a.first_due_date and a.one_year_due
where o.screen='PAP'
order by raw_patid, order_date;

proc sql;
create table PAP2 as
select o.*,
s.screen_date as result_date format date9.
from PAPorders2 o left join screens4 s
on s.raw_patid=o.raw_patid and s.screen_date between o.order_date and o.observation_end;
proc sort data=pap2 out=pap3;
by raw_patid order_date result_date;
run;
data single multiple;
set pap3;
by raw_patid;
if first.raw_patid and last.raw_patid then output single;
if ^(first.raw_patid and last.raw_patid) then output single;
run;*no multiple;


/*combine screen_date from [ORDER PROCEDURE] and result_dates from unlinked screens*/
data PAP2;
set PAP2;
format completion_date screen_date date9.;
completion_date=screen_date;
if result_date^=. then do;
	if screen_date=. or screen_date<order_date then completion_date=result_date;
	if ^(screen_date=. or screen_date<order_date) and 
		result_date<screen_date then completion_date=result_date;
end;
run;







/************************************************************
code new outcome variables, summarize by patient
documented_PAP_1yr
documented_HPV_1yr
documented_order_1yr
completed_PAP_1yr
completed_HPV
***********************************************************/
data pap3;
set pap2;
documented_PAP_1yr=1;
completed_PAP=(screen='PAP' and completion_date^=. and 
	completion_date<=one_year_due);
run;



proc sql;
create table order_summary as
select raw_patid,
max(documented_PAP_1yr) as documented_PAP_1yr,
max(completed_PAP) as completed_PAP_1yr
from pap3
group by raw_patid
order by raw_patid, order_date;


 proc sql;
create table analysis as
select a.*,
max(0,o.documented_PAP_1yr) as documented_PAP_1yr,
max(0,o.completed_PAP_1yr) as completed_PAP_1yr
from scrap.CVC_ANALYSIS a left join order_summary o
on o.raw_patid=a.raw_patid;


data SCRAP.CVC_analysis_09122023;
set analysis;
run;

data analysis;
set SCRAP.CVC_analysis_09122023;
run;
proc sort nodup data=SCRAP.CVC_analysis_09122023 out=analysis dupout=wtf;
by raw_patid;run;

proc sql;
create table dups as select * from SCRAP.CVC_analysis_09122023
where raw_patid in(select distinct raw_patid from wtf)
order by raw_patid;
proc sql;
select count(raw_patid), count(distinct raw_patid) from analysis;
run;/*distinct now*/